package com.study.feature.transform

import org.apache.spark.ml.feature.StopWordsRemover
import org.apache.spark.sql.SparkSession

/**
 * 特征转换-StopWordsRemover
 *
 * @author stephen
 * @date 2019-08-28 11:40
 */
object StopWordsRemoverDemo {

  def main(args: Array[String]): Unit = {

    val spark = SparkSession.builder()
      .appName(this.getClass.getSimpleName)
      .master("local[*]")
      .getOrCreate()

    spark.sparkContext.setLogLevel("warn")

    val dataSet = spark.createDataFrame(Seq(
      (0, Seq("I", "saw", "the", "red", "balloon")),
      (1, Seq("Mary", "had", "a", "little", "lamb"))
    )).toDF("id", "raw")

    // 加载停用词库
    // StopWordsRemover.load("")
    val remover = new StopWordsRemover()
      .setInputCol("raw")
      .setOutputCol("filtered")

    remover.transform(dataSet).show(false)

  }
}
